# ==============================================================================
# file name: 1-define-survey-profs.R
# date:	Jan 25, 2022
# author: Bernhard Clemm / Tiago Ventura
# purpose: implement binary measures of professionalism
# ==============================================================================

rm(list = ls())

# "For our primary analyses we define a survey professional (a) as someone who has on average
# more than 100 survey visits per active day (i.e., a day on which they were online). In the
# SM, we report results based on three alternative definitions, namely (b) a respondent with
# more than 50 percent of all visits to survey sites; (c) a respondent who spends more than
# 50 percent of all browsing time on survey sites, and (d) a respondent who meets any of the
# three conditions."

# FACEBOOK =====================================================================

people_visits_fb <- read.csv("data/browsing_summarized/people_visits_FB.csv")

## Aggregate over both waves ####

people_prof_fb <- people_visits_fb %>%
  group_by(person_id) %>%
  summarise(across(n_total:s_amazon_5_na, ~ sum(., na.rm = F)))

## Measure 1: more than 100 survey visits per day ####

people_prof_fb <- people_prof_fb %>%
  mutate(n_survey_per_day = n_survey / n_days_active) %>%
  mutate(professional_1 = ifelse(n_survey_per_day >= 100, 1, 0))

## Measure 2: more than 50 % of all visits are survey sites ####

people_prof_fb <- people_prof_fb %>%
  mutate(prop_survey = n_survey / n_total) %>%
  mutate(professional_2 = ifelse(prop_survey >= 0.5, 1, 0))

## Measure 3: more than 50% of browsing time per day ####

people_prof_fb <- people_prof_fb %>%
  mutate(s_share_survey_per_day = s_survey_5_na / s_total_5_na) %>%
  mutate(professional_3 = ifelse(s_share_survey_per_day >= .5, 1, 0))

## Measure 4: any of the three above ####

people_prof_fb <- people_prof_fb %>%
  mutate(professional_all = ifelse(
    professional_1 == 1 | professional_2 == 1 | professional_3 == 1, 1, 0
  ))

## Export ####

people_prof_fb <- people_prof_fb %>%
  select(person_id, starts_with("professional"))

write.csv(people_prof_fb, "data/browsing_summarized/people_prof_FB.csv", row.names = F)

# LUCID ========================================================================

people_visits_lu <- read.csv("data/browsing_summarized/people_visits_LU.csv")

## Aggregate over all three waves ####

people_prof_lu <- people_visits_lu %>%
  group_by(person_id) %>%
  summarise(across(n_total:s_amazon_5_na, ~ sum(., na.rm = F)))

## Measure 1: more than 100 survey visits per day ####

people_prof_lu <- people_prof_lu %>%
  mutate(n_survey_per_day = n_survey / n_days_active) %>%
  mutate(professional_1 = ifelse(n_survey_per_day >= 100, 1, 0))

## Measure 2: more than 50 % of all visits are survey sites ####

people_prof_lu <- people_prof_lu %>%
  mutate(prop_survey = n_survey / n_total) %>%
  mutate(professional_2 = ifelse(prop_survey >= 0.5, 1, 0))

## Measure 3: more than 50% of browsing time per day ####

people_prof_lu <- people_prof_lu %>%
  mutate(s_share_survey_per_day = s_survey_5_na / s_total_5_na) %>%
  mutate(professional_3 = ifelse(s_share_survey_per_day >= .5, 1, 0))

## Measure combined: any of the three above ####

people_prof_lu <- people_prof_lu %>%
  mutate(professional_all = ifelse(
    professional_1 == 1 | professional_2 == 1 | professional_3 == 1, 1, 0
  ))

## Export ####

people_prof_lu <- people_prof_lu %>%
  select(person_id, starts_with("professional"))

write.csv(people_prof_lu, "data/browsing_summarized/people_prof_LU.csv", row.names = F)

# YOUGOV =======================================================================

people_visits_yg <- read.csv("data/browsing_summarized/people_visits_YG_anon.csv") %>%
  rename("person_id" = id_anon)

## Measure 1: more than 100 survey visits per day ####

people_prof_yg <- people_visits_yg %>%
  mutate(n_survey_per_day = n_survey / n_days_active) %>%
  mutate(professional_1 = ifelse(n_survey_per_day >= 100, 1, 0))

## Measure 2: more than 50 % of all visits are survey sites ####

people_prof_yg <- people_prof_yg %>%
  mutate(prop_survey = n_survey / n_total) %>%
  mutate(professional_2 = ifelse(prop_survey >= 0.5, 1, 0))

## Measure 3: more than 50% of browsing time per day ####

people_prof_yg <- people_prof_yg %>%
  mutate(s_share_survey_per_day = s_survey_5_na / s_total_5_na) %>%
  mutate(professional_3 = ifelse(s_share_survey_per_day >= .5, 1, 0))

## Measure 4: any of the three above ####

people_prof_yg <- people_prof_yg %>%
  mutate(professional_all = ifelse(
    professional_1 == 1 | professional_2 == 1 | professional_3 == 1, 1, 0
  ))

## Export ####

people_prof_yg <- people_prof_yg %>%
  select(person_id, starts_with("professional"))

write.csv(people_prof_yg, "data/browsing_summarized/people_prof_YG.csv", row.names = F)
